//Do-file to create dataset for the Add Health results
//Last changed by SAO 250109

clear
set more off


***MEDIATORS***
*Separate items for turnout (wave 4) and participation (wave 3 and earlier)
*Choose measure to use in the do-analysis file
use "E:\ProjData\PGS and politics\AHdata\AddHealth_Mediators.dta", clear
***Depression
*Use wave 3 measures for participation
foreach num of numlist 18(1)27 {
replace H4MH`num' = . if inrange(H4MH`num', 4, 9)
}
foreach num of numlist 6(1)13 {
replace H3SP`num' = . if inrange(H3SP`num', 4, 9)
}
*Reverse code a few items
replace H4MH20 = -1*H4MH20+3
replace H4MH24 = -1*H4MH24+3
replace H4MH25 = -1*H4MH25+3
replace H3SP7 = -1*H3SP7+3
replace H3SP11 = -1*H3SP11+3

egen DEP = rowmean(H4MH18 H4MH19 H4MH20 H4MH21 H4MH22 H4MH23 H4MH24 H4MH25 H4MH26 H4MH27)
egen DEP_PART = rowmean(H3SP6 H3SP7 H3SP8 H3SP9 H3SP10 H3SP11 H3SP12 H3SP13)


***Physical activity
*Use wave 3 measure for participation
gen ACTIVITY = H4DA11
replace ACTIVITY = . if inrange(ACTIVITY, 2, 8)
gen ACTIVITY_PART = H2DA6
replace ACTIVITY_PART = . if inrange(ACTIVITY_PART, 4, 8)


***Self-rated health
replace H3GH1 = . if inrange(H3GH1, 6, 98)
*Reverse code the items
gen SRH = -1*H4GH1+5
gen SRH_PART = -1*H3GH1+5


***Neuroticism
*Use wave 3 measures for participation
foreach name in 4 6 12 14 16 20 22 28 {
replace H4PE`name' = . if inrange(H4PE`name', 6, 8)
}
replace H3BM14 = . if inrange(H3BM14, 8, 99)
*Reverse code a few items
foreach name in 4 6 20 22 {
replace H4PE`name' = -1*H4PE`name'+6
}

egen NEURO = rowmean(H4PE4 H4PE6 H4PE12 H4PE14 H4PE16 H4PE20 H4PE22 H4PE28)
gen NEURO_PART = H3BM14


***Extraversion
*Use wave 2 measures for participation
foreach var of varlist H4PE1 H4PE9 H4PE17 H4PE25 H2PF17 H2PF30 H2PF31 {
replace `var' = . if inrange(`var', 6, 8)
}
*Reverse code a few items
foreach var of varlist H4PE1 H4PE17 H2PF17 H2PF31 {
replace `var' = -1*`var'+6
}

egen EXTRA = rowmean(H4PE1 H4PE9 H4PE17 H4PE25)
egen EXTRA_PART = rowmean(H2PF17 H2PF30 H2PF31)


***Subjective wellbeing
*Use wave 2 measure for participation
*For some reason the wave 4 measure is reverse coded
replace H4MH24 = . if inrange(H4MH24, 4, 8)
replace H2FS11 = . if inrange(H2FS11, 4, 8)
rename H4MH24 SWB
replace SWB = SWB*-1+3
rename H2FS11 SWB_PART


***Risk attitudes
*Use wave 3 measure for participation
replace H4PE35 = . if inrange(H4PE35, 6, 8)
replace H3SP23 = . if inrange(H3SP23, 96, 99)
*Reverse code the items
gen RISK = -1*H4PE35+6
gen RISK_PART = -1*H3SP23+6


***Cognitive ability (use Picture Vocabulary Test standardized score)
*Use AH_RAW in the main data
*Standardize all mediators
foreach name in DEP DEP_PART ACTIVITY ACTIVITY_PART SRH SRH_PART NEURO NEURO_PART EXTRA EXTRA_PART SWB SWB_PART RISK RISK_PART {
zscore `name'
drop `name'
rename z_`name' `name'
}

keep AID DEP DEP_PART ACTIVITY ACTIVITY_PART SRH SRH_PART NEURO NEURO_PART EXTRA EXTRA_PART SWB SWB_PART RISK RISK_PART
tempfile AHMediators
save `AHMediators'






use "E:\ProjData\PGS and politics\AHdata\AddHealth_CivicEng.dta", clear
*Use only race=white, DZs and full siblings
drop _merge
merge 1:1 AID using "E:\ProjData\PGS and politics\AHdata\AH_Race_White.dta"
keep if _merge==3
drop _merge
keep if WHITE==1

rename BYR Byear
rename W4_EDUCATION EduYears
*Standardize EA
zscore EduYears
drop EduYears
rename z_EduYears EduYears
replace VOTE = (VOTE-1)/3
rename VOTE TurnoutSelf
rename W4_IDEOLOGY Ideology
replace Ideology = W3_IDEOLOGY if Ideology==.
replace Ideology = Ideology*-1+6
gen Extremism = abs(Ideology-3)/2
replace Ideology = (Ideology-1)/4

*Political participation - use only items available both in STR and AH
rename H3CC9A Contribute
rename H3CC9B Contact
rename H3CC9E Rally
egen PolPart = rowmean(Contribute Contact Rally)


rename PGI14 EA_PGI
rename PGI11 CP_PGI
rename PGI03 ADVENTURE_PGI
rename PGI29 NEURO_PGI
rename PGI33 RISK_PGI
rename PGI16 EXTRA_PGI
rename PGI25 MORNING_PGI
rename PGI34 SRH_PGI
rename PGI12 DEP_PGI
rename PGI36 SWB_PGI
rename PGI01 ACTIVITY_PGI
rename PGI08 BMI_PGI
rename PGI20 HEIGHT_PGI

* Standardize PGIs by birth year
foreach name in EA_PGI ADVENTURE_PGI CP_PGI EXTRA_PGI MORNING_PGI NEURO_PGI RISK_PGI SRH_PGI DEP_PGI SWB_PGI ACTIVITY_PGI BMI_PGI HEIGHT_PGI {
bysort Byear: egen meanPGI = mean(`name')
bysort Byear: egen sdPGI = sd(`name')
gen std`name' = (`name'-meanPGI)/sdPGI
drop meanPGI sdPGI
}

*Keep only DZ twins and full siblings
drop if PC1== .
*keep if dz==1 | fs==1



merge 1:1 AID using `AHMediators'
drop if _merge==2

*Cognitive ability
zscore AH_RAW
rename z_AH_RAW CP


*Standardize target traits
foreach name in DEP ACTIVITY ACTIVITY_PART SRH NEURO EXTRA SWB RISK CP {
zscore `name'
rename z_`name' occ
drop `name'
rename occ `name'
}



keep TurnoutSelf PolPart Ideology Extremism Contribute Contact Rally DEP DEP_PART ACTIVITY ACTIVITY_PART SRH SRH_PART NEURO NEURO_PART EXTRA EXTRA_PART SWB SWB_PART RISK RISK_PART CP stdEA_PGI stdCP_PGI stdEXTRA_PGI stdNEURO_PGI stdRISK_PGI stdADVENTURE_PGI stdMORNING_PGI stdSRH_PGI stdDEP_PGI stdSWB_PGI stdACTIVITY_PGI stdBMI_PGI stdHEIGHT_PGI MALE Byear PC1-PC10 FID SCID EduYears mz EA_PGI ADVENTURE_PGI CP_PGI EXTRA_PGI MORNING_PGI NEURO_PGI RISK_PGI SRH_PGI DEP_PGI SWB_PGI ACTIVITY_PGI BMI_PGI HEIGHT_PGI

rename FID PairID
rename SCID SchoolID
gen sample = "ah"


save "E:\ProjData\PGS and politics/AHdata.dta", replace

